In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import joypy
import seaborn as sns
sns.set()
import os
import matplotlib.cm as cm
from matplotlib.cm import ScalarMappable
from matplotlib.lines import Line2D
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from textwrap import wrap

world_champions_file = 'worldChampions.csv'
world_champions = pd.read_csv(world_champions_file)
world_champions = world_champions.drop(columns=['Unnamed: 0'])
ordered_world_champions = world_champions.sort_values(by='WorldTitles')
category_codes = pd.Categorical(ordered_world_champions['WorldTitles']).codes
colormap = ["#154500", "#7db208", "#bcdd7a", "#acdffe", "#4e9bf8", "#0b2b81"]
colors = ['grey', 'grey', 'grey', 'grey', 'grey', 'grey',
          'grey', 'grey', 'grey', 'grey', 'grey', 'grey',
          'grey', 'grey', 'grey', 'grey', 'grey', 'grey',
          'grey', 'grey', 'grey', 'grey', 'grey', 'grey',
          'grey', 'gold', 'grey', 'grey', 'grey', 'darkorange',
          'mediumblue', 'limegreen', 'red', 'turquoise']

title_max = np.max(ordered_world_champions['WorldTitles'])
title_min = np.min(ordered_world_champions['WorldTitles'])

def scale_to_interval(x, low=10, high=70):
    return ((x - title_min) / (title_max - title_min)) * (high - low) + low

category_codes = pd.Categorical(ordered_world_champions['WorldTitles']).codes
colormap = ["#154500", "#7db208", "#bcdd7a", "#acdffe", "#4e9bf8", "#0b2b81"]
colors = ['grey', 'grey', 'grey', 'grey', 'grey', 'grey',
          'grey', 'grey', 'grey', 'grey', 'grey', 'grey',
          'grey', 'grey', 'grey', 'grey', 'grey', 'grey',
          'grey', 'grey', 'grey', 'grey', 'grey', 'grey',
          'grey', 'orchid', 'grey', 'grey', 'grey', 'darkorange',
          'mediumblue', 'limegreen', 'red', 'turquoise']

angles = np.linspace(0, 2 * np.pi, len(ordered_world_champions['WorldTitles']), endpoint=False)
heights = np.array(ordered_world_champions['WorldTitles'])

plus = 15

fig, ax = plt.subplots(figsize=(25, 25), subplot_kw={"projection": "polar"})
fig.patch.set_facecolor("white")
ax.set_facecolor("white")
ax.set_theta_offset(np.pi / 2)
ax.set_theta_direction(-1)
ax.vlines(angles, 0 + plus, heights + plus, color=colors, lw=2)
ax.scatter(angles, heights + plus, color=colors)
ax.spines["start"].set_color("none")
ax.spines["polar"].set_color("none")
ax.grid(False)
ax.set_xticks([])
ax.set_yticklabels([])

grey1 = "#d6d6d6"
grey2 = "#cccccc"
grey3 = "#c1c1c1"
grey4 = "#b7b7b7"
grey5 = "#adadad"
grey6 = "#a3a3a3"
grey7 = "#999999"

hangles = np.linspace(0, 2 * np.pi, 200)
ax.plot(hangles, np.repeat(plus, 200), color=grey1, lw=0.7)
ax.plot(hangles, np.repeat(1 + plus, 200), color=grey1, lw=0.7)
ax.plot(hangles, np.repeat(2 + plus, 200), color=grey2, lw=0.7)
ax.plot(hangles, np.repeat(3 + plus, 200), color=grey3, lw=0.7)
ax.plot(hangles, np.repeat(4 + plus, 200), color=grey4, lw=0.7)
ax.plot(hangles, np.repeat(5 + plus, 200), color=grey5, lw=0.7)
ax.plot(hangles, np.repeat(6 + plus, 200), color=grey6, lw=0.7)
ax.plot(hangles, np.repeat(7 + plus, 200), color=grey7, lw=0.7)

for angle, height, driver, titles in zip(angles, heights, ordered_world_champions['Driver'], ordered_world_champions['WorldTitles']):
    ax.text(angle, height + plus, f"{driver} ({titles})", ha='left', va='center', 
            fontsize=12, fontstyle='italic', fontfamily='serif', color='black')

    
import os
directory = 'images'
if not os.path.exists(directory):
    os.makedirs(directory)
fig.savefig(f'{directory}/WorldChampions.svg', format='svg', dpi=1200, facecolor='w', bbox_inches='tight', pad_inches=0)
/Users/sainishanth/anaconda3/lib/python3.10/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.5' currently installed).
  from pandas.core import (
In [2]:
world_champions_file = 'worldChampions.csv'
world_champions = pd.read_csv(world_champions_file)
print(world_champions)
    Unnamed: 0              Driver  WorldTitles
0            0         Nino Farina            1
1            1  Juan Manuel Fangio            5
2            2      Alberto Ascari            2
3            3       Mike Hawthorn            1
4            4        Jack Brabham            3
5            5           Phil Hill            1
6            6         Graham Hill            2
7            7           Jim Clark            2
8            8        John Surtees            1
9            9         Denny Hulme            1
10          10      Jackie Stewart            3
11          11        Jochen Rindt            1
12          12  Emerson Fittipaldi            2
13          13          Niki Lauda            3
14          14          James Hunt            1
15          15      Mario Andretti            1
16          16      Jody Scheckter            1
17          17          Alan Jones            1
18          18       Nelson Piquet            3
19          19        Keke Rosberg            1
20          20         Alain Prost            4
21          21        Ayrton Senna            3
22          22       Nigel Mansell            1
23          23  Michael Schumacher            7
24          24          Damon Hill            1
25          25  Jacques Villeneuve            1
26          26       Mika Häkkinen            2
27          27     Fernando Alonso            2
28          28      Kimi Räikkönen            1
29          29      Lewis Hamilton            7
30          30       Jenson Button            1
31          31    Sebastian Vettel            4
32          32        Nico Rosberg            1
33          33      Max Verstappen            1
In [3]:
import pandas as pd
import altair as alt

# Assuming you've already loaded the data into 'world_champions' dataframe

# Define the heatmap chart
heatmap = alt.Chart(world_champions).mark_rect().encode(
    x=alt.X('Driver:N', title='Driver', sort=None),  # Sort by original order
    y=alt.Y('WorldTitles:O', title='World Titles', sort=None),  # Sort by original order
    color=alt.Color('WorldTitles:Q', title='World Titles', scale=alt.Scale(scheme='viridis'))
).properties(
    width=600,
    height=300,
    title='World Championships by Driver'
)

# Add text labels
text = heatmap.mark_text(baseline='middle').encode(
    text='WorldTitles:Q',
    color=alt.condition(
        alt.datum.WorldTitles > 3,
        alt.value('white'),
        alt.value('black')
    )
)

# Combine the heatmap and text
chart = (heatmap + text).configure_axis(
    labelFontSize=12,
    titleFontSize=14
)

# Show the chart
chart
Out[3]:
In [4]:
import pandas as pd
from plotnine import *
import numpy as np

results = ('results.csv')
results = pd.read_csv(results)

races = ('races.csv')
races = pd.read_csv(races)

drivers = ('drivers.csv')
drivers = pd.read_csv(drivers)

current_point_system = ('currPointSystem.csv')
current_point_system = pd.read_csv(current_point_system)

data_driver_1 = results[results['driverId'] == 1]
data_driver_30 = results[results['driverId'] == 30]
data_driver_579 = results[results['driverId'] == 579]
data_driver_117 = results[results['driverId'] == 117]
data_driver_20 = results[results['driverId'] == 20]
data_driver_101 = results[results['driverId'] == 101]
data_driver_328 = results[results['driverId'] == 328]
data_driver_182 = results[results['driverId'] == 182]
data_driver_37 = results[results['driverId'] == 37]
data_driver_102 = results[results['driverId'] == 102]
data_driver_647 = results[results['driverId'] == 647]
data_driver_373 = results[results['driverId'] == 373]
data_driver_289 = results[results['driverId'] == 289]
data_driver_224 = results[results['driverId'] == 224]
data_driver_57 = results[results['driverId'] == 57]
data_driver_4 = results[results['driverId'] == 4]
data_driver_642 = results[results['driverId'] == 642]
data_driver_578 = results[results['driverId'] == 578]
data_driver_403 = results[results['driverId'] == 403]
data_driver_341 = results[results['driverId'] == 341]
data_driver_304 = results[results['driverId'] == 304]
data_driver_358 = results[results['driverId'] == 358]
data_driver_231 = results[results['driverId'] == 231]
data_driver_207 = results[results['driverId'] == 207]
data_driver_222 = results[results['driverId'] == 222]
data_driver_178 = results[results['driverId'] == 178]
data_driver_177 = results[results['driverId'] == 177]
data_driver_3 = results[results['driverId'] == 3]
data_driver_71 = results[results['driverId'] == 71]
data_driver_95 = results[results['driverId'] == 95]
data_driver_35 = results[results['driverId'] == 35]
data_driver_8 = results[results['driverId'] == 8]
data_driver_18 = results[results['driverId'] == 18]
data_driver_830 = results[results['driverId'] == 830]

frames_driver = [data_driver_1, data_driver_30, data_driver_579, data_driver_117, 
                 data_driver_20, data_driver_101, data_driver_328, data_driver_182,
                 data_driver_37, data_driver_102, data_driver_647, data_driver_373, 
                 data_driver_289, data_driver_224, data_driver_57, data_driver_4, 
                 data_driver_642, data_driver_578, data_driver_403, data_driver_341, 
                 data_driver_304, data_driver_358, data_driver_231, data_driver_207,
                 data_driver_222, data_driver_178, data_driver_177, data_driver_3, 
                 data_driver_71, data_driver_95, data_driver_35, data_driver_8, 
                 data_driver_18, data_driver_830]
result_driver = pd.concat(frames_driver)


idPil_race = result_driver.join(races.set_index('raceId'), on='raceId', lsuffix='caller', rsuffix='other')

idPil_race_driverName = idPil_race.join(drivers.set_index('driverId'), on='driverId', lsuffix='caller', rsuffix='other')

idPil_race_driverName['forename'] = idPil_race_driverName['forename'] + " " + idPil_race_driverName['surname']
idPil_race_driverName = idPil_race_driverName.drop(columns=['surname'], axis=1)

idPil_race_driverName['dob'] = pd.to_datetime(idPil_race_driverName['dob'], format='%Y-%m-%d')
idPil_race_driverName['date'] = pd.to_datetime(idPil_race_driverName['date'], format='%Y-%m-%d')
idPil_race_driverName['driver_age'] = ((idPil_race_driverName['date'] - idPil_race_driverName['dob']).dt.days) / 365.25
current_point_system['positionOrder'] = current_point_system['positionOrder'].astype('str')
idPil_race_driverName = idPil_race_driverName.join(current_point_system.set_index('positionOrder'), on='position', lsuffix='caller', rsuffix='other')
idPil_race_driverName = idPil_race_driverName.drop(columns=['raceId', 'constructorId', 'driverId', 'name', 'numbercaller', 'pointscaller', 'grid', 'position', 'positionText', 'positionOrder', 'laps', 'timecaller', 'milliseconds', 'fastestLap', 'rank', 'fastestLapTime', 'fastestLapSpeed', 'statusId', 'year', 'round', 'circuitId', 'date', 'timeother', 'urlcaller', 'driverRef', 'numberother', 'code', 'dob', 'nationality', 'urlother'], axis=1)
idPil_race_driverName = idPil_race_driverName.reset_index(drop=True)
idPil_race_driverName.loc[0:287, ['forename']] = 'ZZZLewis Hamilton'
idPil_race_driverName.loc[288:595, ['forename']] = 'ZZZMichael Schumacher'
idPil_race_driverName.loc[596:653, ['forename']] = 'ZZZJuan Fangio'
idPil_race_driverName.loc[654:855, ['forename']] = 'ZZZAlain Prost'
idPil_race_driverName.loc[856:1135, ['forename']] = 'ZZZSebastian Vettel'
idPil_race_driverName.loc[1547:1708, ['forename']] = 'ZZZAyrton Senna'

idPil_race_driverName = idPil_race_driverName.sort_values(['driver_age'], ascending=True)
idPil_race_driverName['pointsother'] = idPil_race_driverName['pointsother'].replace(np.nan, 0)
idPil_race_driverName['cumulative_points'] = idPil_race_driverName.groupby('forename')['pointsother'].cumsum()

fig1 = (ggplot(idPil_race_driverName, aes(x='driver_age', y='cumulative_points')) +
        scale_x_continuous(labels=['20 years', '30 years', '40 years', '50 years']) +
        geom_line(aes(group='forename', color='forename', size='forename')) +
        scale_color_manual(values=['grey', 'grey', 'grey', 'grey', 'grey', 'grey',
                                   'grey', 'grey', 'grey', 'grey', 'grey', 'grey',
                                   'grey', 'grey', 'grey', 'grey', 'grey', 'grey',
                                   'grey', 'grey', 'grey', 'grey', 'grey', 'grey',
                                   'grey', 'grey', 'grey', 'grey', 'darkorange', 'orchid',
                                   'limegreen', 'turquoise', 'red', 'mediumblue']) +
        scale_size_manual(values=[0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
                                   0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 1.2, 1.2, 1.2, 1.2, 1.2, 1.2]) +
        theme_minimal() +  # or theme_bw() for a plain white background with grid lines
        theme(panel_background=element_rect(fill="white", color="white")) +
        theme(panel_grid_major=element_blank(), panel_grid_minor=element_blank()) +
        theme(legend_position=('none')) +
        theme(figure_size=(17, 12))
        )

print(fig1)


fig1.save(filename=f'{directory}/ChampionsPoints.png', dpi=1200, width=17, height=12)
/var/folders/09/z4b8vf1j5gnb4sqltdm7hxh80000gn/T/ipykernel_6436/2551521217.py:107: FutureWarning: Using print(plot) to draw and show the plot figure is deprecated and will be removed in a future version. Use plot.show().

/Users/sainishanth/anaconda3/lib/python3.10/site-packages/plotnine/ggplot.py:606: PlotnineWarning: Saving 17 x 12 in image.
/Users/sainishanth/anaconda3/lib/python3.10/site-packages/plotnine/ggplot.py:607: PlotnineWarning: Filename: images/ChampionsPoints.png
In [5]:
results = ('results.csv')
results = pd.read_csv(results)

races = ('races.csv')
races = pd.read_csv(races)

drivers = ('drivers.csv')
drivers = pd.read_csv(drivers)


print(results)
print(races)
print(drivers)
       resultId  raceId  driverId  constructorId number  grid position  \
0             1      18         1              1     22     1        1   
1             2      18         2              2      3     5        2   
2             3      18         3              3      7     7        3   
3             4      18         4              4      5    11        4   
4             5      18         5              1     23     3        5   
...         ...     ...       ...            ...    ...   ...      ...   
25394     25400    1073       815              9     11     4       15   
25395     25401    1073       849              3      6    16       \N   
25396     25402    1073       841             51     99    14       \N   
25397     25403    1073       847              3     63    17       \N   
25398     25404    1073         8             51      7    18       \N   

      positionText  positionOrder  points  laps         time milliseconds  \
0                1              1    10.0    58  1:34:50.616      5690616   
1                2              2     8.0    58       +5.478      5696094   
2                3              3     6.0    58       +8.163      5698779   
3                4              4     5.0    58      +17.181      5707797   
4                5              5     4.0    58      +18.014      5708630   
...            ...            ...     ...   ...          ...          ...   
25394           15             15     0.0    55           \N           \N   
25395            R             16     0.0    50           \N           \N   
25396            R             17     0.0    33           \N           \N   
25397            R             18     0.0    26           \N           \N   
25398            R             19     0.0    25           \N           \N   

      fastestLap rank fastestLapTime fastestLapSpeed  statusId  
0             39    2       1:27.452         218.300         1  
1             41    3       1:27.739         217.586         1  
2             41    5       1:28.090         216.719         1  
3             58    7       1:28.603         215.464         1  
4             43    1       1:27.418         218.385         1  
...          ...  ...            ...             ...       ...  
25394         51    2       1:26.419         219.993         5  
25395         30   15       1:29.293         212.912         3  
25396         33   16       1:29.442         212.557         6  
25397         23   19       1:30.647         209.732         6  
25398         23   18       1:29.698         211.951        23  

[25399 rows x 18 columns]
      raceId  year  round  circuitId                      name        date  \
0          1  2009      1          1     Australian Grand Prix  2009-03-29   
1          2  2009      2          2      Malaysian Grand Prix  2009-04-05   
2          3  2009      3         17        Chinese Grand Prix  2009-04-19   
3          4  2009      4          3        Bahrain Grand Prix  2009-04-26   
4          5  2009      5          4        Spanish Grand Prix  2009-05-10   
...      ...   ...    ...        ...                       ...         ...   
1075    1092  2022     19         22       Japanese Grand Prix  2022-10-09   
1076    1093  2022     20         69  United States Grand Prix  2022-10-23   
1077    1094  2022     21         32    Mexico City Grand Prix  2022-10-30   
1078    1095  2022     22         18      São Paulo Grand Prix  2022-11-13   
1079    1096  2022     23         24      Abu Dhabi Grand Prix  2022-11-20   

          time                                                url  
0     06:00:00  http://en.wikipedia.org/wiki/2009_Australian_G...  
1     09:00:00  http://en.wikipedia.org/wiki/2009_Malaysian_Gr...  
2     07:00:00  http://en.wikipedia.org/wiki/2009_Chinese_Gran...  
3     12:00:00  http://en.wikipedia.org/wiki/2009_Bahrain_Gran...  
4     12:00:00  http://en.wikipedia.org/wiki/2009_Spanish_Gran...  
...        ...                                                ...  
1075  05:10:00  https://en.wikipedia.org/wiki/2022_Japanese_Gr...  
1076  19:00:00  https://en.wikipedia.org/wiki/2022_United_Stat...  
1077  19:00:00  https://en.wikipedia.org/wiki/2022_Mexican_Gra...  
1078  17:00:00  https://en.wikipedia.org/wiki/2022_S%C3%A3o_Pa...  
1079  13:00:00  https://en.wikipedia.org/wiki/2022_Abu_Dhabi_G...  

[1080 rows x 8 columns]
     driverId        driverRef number code  forename     surname         dob  \
0           1         hamilton     44  HAM     Lewis    Hamilton  1985-01-07   
1           2         heidfeld     \N  HEI      Nick    Heidfeld  1977-05-10   
2           3          rosberg      6  ROS      Nico     Rosberg  1985-06-27   
3           4           alonso     14  ALO  Fernando      Alonso  1981-07-29   
4           5       kovalainen     \N  KOV    Heikki  Kovalainen  1981-10-19   
..        ...              ...    ...  ...       ...         ...         ...   
849       851           aitken     89  AIT      Jack      Aitken  1995-09-23   
850       852          tsunoda     22  TSU      Yuki     Tsunoda  2000-05-11   
851       853          mazepin      9  MAZ    Nikita     Mazepin  1999-03-02   
852       854  mick_schumacher     47  MSC      Mick  Schumacher  1999-03-22   
853       855             zhou     \N  ZHO    Guanyu        Zhou  1999-05-30   

    nationality                                             url  
0       British     http://en.wikipedia.org/wiki/Lewis_Hamilton  
1        German      http://en.wikipedia.org/wiki/Nick_Heidfeld  
2        German       http://en.wikipedia.org/wiki/Nico_Rosberg  
3       Spanish    http://en.wikipedia.org/wiki/Fernando_Alonso  
4       Finnish  http://en.wikipedia.org/wiki/Heikki_Kovalainen  
..          ...                                             ...  
849     British        http://en.wikipedia.org/wiki/Jack_Aitken  
850    Japanese       http://en.wikipedia.org/wiki/Yuki_Tsunoda  
851     Russian     http://en.wikipedia.org/wiki/Nikita_Mazepin  
852      German    http://en.wikipedia.org/wiki/Mick_Schumacher  
853     Chinese       https://en.wikipedia.org/wiki/Guanyu_Zhou  

[854 rows x 9 columns]
In [6]:
import plotly.graph_objects as go

fig2 = go.Figure(data=[go.Sankey(
    node=dict(
        pad=15,
        thickness=20,
        line=dict(color="black", width=2),
        label=["Alain Prost", "Ayrton Senna", "Juan Manuel Fangio", "Lewis Hamilton", "Michael Schumacher", "Sebastian Vettel",
               "Finished race", "Race not finished", "Lapped",
               "Accident", "Mechanical issue", "Wheel problem", "Technical/electronic issue", "Not classified"],
        color="white",
        # fixed positions
        x=[0, 0, 0, 0, 0, 0, 0.5, 0.5, 0.5, 0.9, 0.9, 0.9, 0.9, 0.9],
        y=[0, 0.2, 0.4, 0.6, 0.8, 1, 0.3, 0.9, 0.7, 0.2, 0.4, 0.6, 0.8, 1]
    ),
    link=dict(
        source=[0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7],
        target=[6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 9, 10, 11, 12, 13, 9, 10, 11, 12, 13, 9, 10, 11, 12, 13, 9, 10, 11, 12, 13, 9, 10, 11, 12, 13, 9, 10, 11, 12, 13],
        value=[119, 85, 37, 251, 219, 213, 63, 62, 14, 26, 68, 42, 18, 12, 7, 9, 18, 24, 20, 27, 0, 13, 3, 22, 22, 3, 14, 1, 1, 11, 0, 2, 0, 13, 8, 2, 2, 1, 33, 22, 5, 6, 2, 17, 15, 4, 5, 1],
        color=['darkorange', 'orchid', 'limegreen', 'turquoise', 'red', 'mediumblue', 'darkorange', 'orchid', 'limegreen', 'turquoise', 'red', 'mediumblue', 'darkorange', 'orchid', 'limegreen', 'turquoise', 'red', 'mediumblue',
               'darkorange', 'darkorange', 'darkorange', 'darkorange', 'darkorange', 'orchid', 'orchid', 'orchid', 'orchid', 'orchid', 
               'limegreen', 'limegreen', 'limegreen', 'limegreen', 'limegreen', 'turquoise', 'turquoise', 'turquoise', 'turquoise', 'turquoise', 
               'red', 'red', 'red', 'red', 'red', 'mediumblue', 'mediumblue', 'mediumblue', 'mediumblue', 'mediumblue']
    ),
)],
    layout=dict(
        title=dict(
            text='', 
            font=dict(size=17, family='Georgia'),
            x=0,
            y=0.98
        ),
    )
)

fig2
In [7]:
fig3 = go.Figure(data=[go.Sankey(
    node = dict(
      pad = 15,
      thickness = 20,
      line = dict(color = "black", width = 2),
      label = ["Lewis Hamilton", "Sebastian Vettel",
               "Race finished", "Race not finished", "Lapped", "Race finished", "Race not finished", "Lapped",
               "Accident", "Mechanical issue", "Wheel issue", "Technical/electronic issue", "Not classified",
               "Accident", "Mechanical issue", "Wheel issue", "Technical/electronic issue", "Not classified"],
      color = ["turquoise", "red", "turquoise", "turquoise", "turquoise", "red", "red", "red", "turquoise", "turquoise", "turquoise", "turquoise", "turquoise", "red", "red", "red", "red", "red"],
      # fixed positions
      x = [0, 0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9, 0.9],
      y = [0.2, 0.6, 0.1, 0.5, 0.35, 0.6, 0.9, 0.75, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
    ),
    link = dict(
      source = [0, 1, 0, 1, 0, 1,    3, 3, 3, 3, 3, 6, 6, 6, 6, 6], # indices correspond to labels
      target = [2, 5, 3, 6, 4, 7,    8, 9, 10, 11, 12, 13, 14, 15, 16, 17],
      value = [251, 219, 26, 68, 9, 18,     13, 8, 2, 2, 1, 33, 22, 5, 6, 2],
      #color = ['turquoise', 'red', 'turquoise', 'purple', 'turquoise', 'red', 
        #       'turquoise', 'turquoise', 'turquoise', 'turquoise', 'turquoise', 'red', 'red', 'red', 'red', 'red']
  ),)],
    layout = dict(
        title = dict(
        text = '', 
        font = dict(size = 17, family = 'Georgia'),
        x=0,
        y=0.98
        ),
    )
    )

fig3
In [8]:
circuits = pd.read_csv('circuits.csv')
print(circuits)
    circuitId   circuitRef                                  name  \
0           1  albert_park        Albert Park Grand Prix Circuit   
1           2       sepang          Sepang International Circuit   
2           3      bahrain         Bahrain International Circuit   
3           4    catalunya        Circuit de Barcelona-Catalunya   
4           5     istanbul                         Istanbul Park   
..        ...          ...                                   ...   
74         75     portimao    Autódromo Internacional do Algarve   
75         76      mugello  Autodromo Internazionale del Mugello   
76         77       jeddah                 Jeddah Street Circuit   
77         78       losail          Losail International Circuit   
78         79        miami         Miami International Autodrome   

        location        country       lat        lng  alt  \
0      Melbourne      Australia -37.84970  144.96800   10   
1   Kuala Lumpur       Malaysia   2.76083  101.73800   18   
2         Sakhir        Bahrain  26.03250   50.51060    7   
3       Montmeló          Spain  41.57000    2.26111  109   
4       Istanbul         Turkey  40.95170   29.40500  130   
..           ...            ...       ...        ...  ...   
74      Portimão       Portugal  37.22700   -8.62670  108   
75       Mugello          Italy  43.99750   11.37190  255   
76        Jeddah   Saudi Arabia  21.54330   39.17280   15   
77     Al Daayen          Qatar  25.49000   51.45420   \N   
78         Miami  United States  25.95810  -80.23890   \N   

                                                  url  
0   http://en.wikipedia.org/wiki/Melbourne_Grand_P...  
1   http://en.wikipedia.org/wiki/Sepang_Internatio...  
2   http://en.wikipedia.org/wiki/Bahrain_Internati...  
3   http://en.wikipedia.org/wiki/Circuit_de_Barcel...  
4          http://en.wikipedia.org/wiki/Istanbul_Park  
..                                                ...  
74  http://en.wikipedia.org/wiki/Algarve_Internati...  
75       http://en.wikipedia.org/wiki/Mugello_Circuit  
76  http://en.wikipedia.org/wiki/Jeddah_Street_Cir...  
77  http://en.wikipedia.org/wiki/Losail_Internatio...  
78  https://en.wikipedia.org/wiki/Miami_Internatio...  

[79 rows x 9 columns]
In [9]:
import pandas as pd
import altair as alt
import altair_saver

# Load the circuits data
circuits = pd.read_csv('circuits.csv')

# Filter out circuits with missing latitude or longitude values
circuits = circuits[(circuits['lat'] != '\\N') & (circuits['lng'] != '\\N')]

# Convert latitude and longitude columns to float
circuits['lat'] = circuits['lat'].astype(float)
circuits['lng'] = circuits['lng'].astype(float)

# Define the geoplot for circuits
geoplot_circuits = alt.Chart(circuits).mark_circle().encode(
    longitude='lng:Q',
    latitude='lat:Q',
    size=alt.value(100),  # Size of the circles
    color=alt.Color('country:N', legend=None),  # Color by country
    tooltip=['name', 'country']  # Display circuit name and country on hover
).properties(
    width=800,
    height=400,
    title='Formula 1 Circuits Around the World'
).project(
    type='mercator'  # Projection type
)

# Define the world map
world_map = alt.Chart(alt.topo_feature('https://vega.github.io/vega-datasets/data/world-110m.json', 'countries')).mark_geoshape(
    fill='lightgray',
    stroke='white'
).properties(
    width=800,
    height=400
)

# Combine the geoplot of circuits and the world map
combined_plot = (world_map + geoplot_circuits)

combined_plot
Out[9]:
In [10]:
constructor_results = pd.read_csv('constructor_results.csv')
constructors = pd.read_csv('constructors.csv')
races = pd.read_csv('races.csv')
print(constructor_results)
print(races)
print(constructors)
       constructorResultsId  raceId  constructorId  points status
0                         1      18              1    14.0     \N
1                         2      18              2     8.0     \N
2                         3      18              3     9.0     \N
3                         4      18              4     5.0     \N
4                         5      18              5     2.0     \N
...                     ...     ...            ...     ...    ...
11945                 16445    1073            214     6.0     \N
11946                 16446    1073            117     0.0     \N
11947                 16447    1073            210     0.0     \N
11948                 16448    1073              3     0.0     \N
11949                 16449    1073             51     0.0     \N

[11950 rows x 5 columns]
      raceId  year  round  circuitId                      name        date  \
0          1  2009      1          1     Australian Grand Prix  2009-03-29   
1          2  2009      2          2      Malaysian Grand Prix  2009-04-05   
2          3  2009      3         17        Chinese Grand Prix  2009-04-19   
3          4  2009      4          3        Bahrain Grand Prix  2009-04-26   
4          5  2009      5          4        Spanish Grand Prix  2009-05-10   
...      ...   ...    ...        ...                       ...         ...   
1075    1092  2022     19         22       Japanese Grand Prix  2022-10-09   
1076    1093  2022     20         69  United States Grand Prix  2022-10-23   
1077    1094  2022     21         32    Mexico City Grand Prix  2022-10-30   
1078    1095  2022     22         18      São Paulo Grand Prix  2022-11-13   
1079    1096  2022     23         24      Abu Dhabi Grand Prix  2022-11-20   

          time                                                url  
0     06:00:00  http://en.wikipedia.org/wiki/2009_Australian_G...  
1     09:00:00  http://en.wikipedia.org/wiki/2009_Malaysian_Gr...  
2     07:00:00  http://en.wikipedia.org/wiki/2009_Chinese_Gran...  
3     12:00:00  http://en.wikipedia.org/wiki/2009_Bahrain_Gran...  
4     12:00:00  http://en.wikipedia.org/wiki/2009_Spanish_Gran...  
...        ...                                                ...  
1075  05:10:00  https://en.wikipedia.org/wiki/2022_Japanese_Gr...  
1076  19:00:00  https://en.wikipedia.org/wiki/2022_United_Stat...  
1077  19:00:00  https://en.wikipedia.org/wiki/2022_Mexican_Gra...  
1078  17:00:00  https://en.wikipedia.org/wiki/2022_S%C3%A3o_Pa...  
1079  13:00:00  https://en.wikipedia.org/wiki/2022_Abu_Dhabi_G...  

[1080 rows x 8 columns]
     constructorId constructorRef            name nationality  \
0                1        mclaren         McLaren     British   
1                2     bmw_sauber      BMW Sauber      German   
2                3       williams        Williams     British   
3                4        renault         Renault      French   
4                5     toro_rosso      Toro Rosso     Italian   
..             ...            ...             ...         ...   
206            209          manor  Manor Marussia     British   
207            210           haas    Haas F1 Team    American   
208            211   racing_point    Racing Point     British   
209            213     alphatauri      AlphaTauri     Italian   
210            214         alpine  Alpine F1 Team      French   

                                                   url  
0                 http://en.wikipedia.org/wiki/McLaren  
1              http://en.wikipedia.org/wiki/BMW_Sauber  
2    http://en.wikipedia.org/wiki/Williams_Grand_Pr...  
3    http://en.wikipedia.org/wiki/Renault_in_Formul...  
4     http://en.wikipedia.org/wiki/Scuderia_Toro_Rosso  
..                                                 ...  
206      http://en.wikipedia.org/wiki/Manor_Motorsport  
207          http://en.wikipedia.org/wiki/Haas_F1_Team  
208  http://en.wikipedia.org/wiki/Racing_Point_F1_Team  
209   http://en.wikipedia.org/wiki/Scuderia_AlphaTauri  
210        http://en.wikipedia.org/wiki/Alpine_F1_Team  

[211 rows x 5 columns]
In [11]:
import pandas as pd
import altair as alt

constructor_results = pd.read_csv('constructor_results.csv')
constructors = pd.read_csv('constructors.csv')

# Merge dataframes to get constructor names and points
merged_data = pd.merge(constructor_results, constructors, on='constructorId')

# Sum up the points for each constructor
constructor_points = merged_data.groupby('name')['points'].sum().reset_index()

# Sort the constructors by total points
constructor_points_sorted = constructor_points.sort_values(by='points', ascending=False)

# Create bar chart
bar_chart = alt.Chart(constructor_points_sorted.head(10)).mark_bar().encode(
    x=alt.X('points:Q', axis=alt.Axis(title='Total Points')),
    y=alt.Y('name:N', sort='-x', axis=alt.Axis(title='Constructor')),
    color=alt.Color('name:N', legend=None),
    tooltip=['name', 'points']
).properties(
    width=800,
    height=400,
    title='Top 10 Constructors by Total Points'
)

bar_chart
Out[11]:
In [12]:
import pandas as pd
import altair as alt

# Assuming you have already loaded the data into DataFrames
# constructor_results = pd.read_csv('constructor_results.csv')
# constructors = pd.read_csv('constructors.csv')
# races = pd.read_csv('races.csv')

# Merge constructor_results with races to get the year information
merged_data = pd.merge(constructor_results, races, on='raceId')

# Merge the merged_data with constructors to get the constructor names
merged_data = pd.merge(merged_data, constructors, on='constructorId')
print(merged_data)
       constructorResultsId  raceId  constructorId  points status  year  \
0                         1      18              1    14.0     \N  2008   
1                         2      18              2     8.0     \N  2008   
2                         3      18              3     9.0     \N  2008   
3                         4      18              4     5.0     \N  2008   
4                         5      18              5     2.0     \N  2008   
...                     ...     ...            ...     ...    ...   ...   
11945                 16445    1073            214     6.0     \N  2021   
11946                 16446    1073            117     0.0     \N  2021   
11947                 16447    1073            210     0.0     \N  2021   
11948                 16448    1073              3     0.0     \N  2021   
11949                 16449    1073             51     0.0     \N  2021   

       round  circuitId                 name_x        date      time  \
0          1          1  Australian Grand Prix  2008-03-16  04:30:00   
1          1          1  Australian Grand Prix  2008-03-16  04:30:00   
2          1          1  Australian Grand Prix  2008-03-16  04:30:00   
3          1          1  Australian Grand Prix  2008-03-16  04:30:00   
4          1          1  Australian Grand Prix  2008-03-16  04:30:00   
...      ...        ...                    ...         ...       ...   
11945     22         24   Abu Dhabi Grand Prix  2021-12-12  13:00:00   
11946     22         24   Abu Dhabi Grand Prix  2021-12-12  13:00:00   
11947     22         24   Abu Dhabi Grand Prix  2021-12-12  13:00:00   
11948     22         24   Abu Dhabi Grand Prix  2021-12-12  13:00:00   
11949     22         24   Abu Dhabi Grand Prix  2021-12-12  13:00:00   

                                                   url_x constructorRef  \
0      http://en.wikipedia.org/wiki/2008_Australian_G...        mclaren   
1      http://en.wikipedia.org/wiki/2008_Australian_G...     bmw_sauber   
2      http://en.wikipedia.org/wiki/2008_Australian_G...       williams   
3      http://en.wikipedia.org/wiki/2008_Australian_G...        renault   
4      http://en.wikipedia.org/wiki/2008_Australian_G...     toro_rosso   
...                                                  ...            ...   
11945  http://en.wikipedia.org/wiki/2021_Abu_Dhabi_Gr...         alpine   
11946  http://en.wikipedia.org/wiki/2021_Abu_Dhabi_Gr...   aston_martin   
11947  http://en.wikipedia.org/wiki/2021_Abu_Dhabi_Gr...           haas   
11948  http://en.wikipedia.org/wiki/2021_Abu_Dhabi_Gr...       williams   
11949  http://en.wikipedia.org/wiki/2021_Abu_Dhabi_Gr...           alfa   

               name_y nationality  \
0             McLaren     British   
1          BMW Sauber      German   
2            Williams     British   
3             Renault      French   
4          Toro Rosso     Italian   
...               ...         ...   
11945  Alpine F1 Team      French   
11946    Aston Martin     British   
11947    Haas F1 Team    American   
11948        Williams     British   
11949      Alfa Romeo       Swiss   

                                                   url_y  
0                   http://en.wikipedia.org/wiki/McLaren  
1                http://en.wikipedia.org/wiki/BMW_Sauber  
2      http://en.wikipedia.org/wiki/Williams_Grand_Pr...  
3      http://en.wikipedia.org/wiki/Renault_in_Formul...  
4       http://en.wikipedia.org/wiki/Scuderia_Toro_Rosso  
...                                                  ...  
11945        http://en.wikipedia.org/wiki/Alpine_F1_Team  
11946  http://en.wikipedia.org/wiki/Aston_Martin_in_F...  
11947          http://en.wikipedia.org/wiki/Haas_F1_Team  
11948  http://en.wikipedia.org/wiki/Williams_Grand_Pr...  
11949  http://en.wikipedia.org/wiki/Alfa_Romeo_in_For...  

[11950 rows x 16 columns]
In [13]:
# Group by year and find the maximum points
max_points_per_year = merged_data.groupby('year')['points'].max().reset_index()

# Find the corresponding constructor for each maximum points entry
winning_constructors = merged_data.loc[merged_data.groupby('year')['points'].idxmax(), ['year', 'name_y']].reset_index(drop=True)

# Merge the maximum points and winning constructors data
max_points_per_year = pd.merge(max_points_per_year, winning_constructors, on='year')

# Plot the line graph
line_chart = alt.Chart(max_points_per_year).mark_line().encode(
    x='year:O',
    y='points:Q',
    color=alt.value('blue'),  # Fixed color for the line
    tooltip=['year', 'points', 'name_y']  # Show year, points, and name of the winning constructor on hover
).properties(
    width=600,
    height=400,
    title="Maximum Points Scored by Any Constructor Each Year"
)

# Filter out the data where Ferrari has won
ferrari_wins = max_points_per_year[max_points_per_year['name_y'] == 'Ferrari']

# Annotate Ferrari wins
point_annotations = alt.Chart(ferrari_wins).mark_text(
    align='left',
    baseline='middle',
    dx=5,  # Adjust text position
    dy=-10,  # Adjust text position
    fontSize=10  # Adjust text size
).encode(
    x='year:O',
    y='points:Q',
    text=alt.value('Ferrari')  # Annotate with 'Ferrari'
)

line_chart + point_annotations
Out[13]:
In [14]:
# Plot the line graph
line_chart = alt.Chart(max_points_per_year).mark_line(color='red').encode(
    x='year:O',
    y='points:Q',
    tooltip=['year', 'points', 'name_y']  # Show year, points, and name of the winning constructor on hover
).properties(
    width=600,
    height=400,
    title="Maximum Points Scored by Any Constructor Each Year"
)

# Filter out the data where Ferrari has won
ferrari_wins = max_points_per_year[max_points_per_year['name_y'] == 'Ferrari']

# Annotate Ferrari wins with circle markers in red
point_annotations = alt.Chart(ferrari_wins).mark_circle(color='red', size=100).encode(
    x='year:O',
    y='points:Q',
)

line_chart + point_annotations
Out[14]:
In [15]:
# Plot the line graph
line_chart = alt.Chart(max_points_per_year).mark_line(color='turquoise').encode(
    x='year:O',
    y='points:Q',
    tooltip=['year', 'points', 'name_y']  # Show year, points, and name of the winning constructor on hover
).properties(
    width=600,
    height=400,
    title="Maximum Points Scored by Any Constructor Each Year"
)

# Filter out the data where Mercedes has won
mercedes_wins = max_points_per_year[max_points_per_year['name_y'] == 'Mercedes']

# Annotate Mercedes wins with circle markers in turquoise
point_annotations = alt.Chart(mercedes_wins).mark_circle(color='turquoise', size=100).encode(
    x='year:O',
    y='points:Q',
)

line_chart + point_annotations
Out[15]:
In [16]:
# Plot the line graph
line_chart = alt.Chart(max_points_per_year).mark_line(color='navy').encode(
    x='year:O',
    y='points:Q',
    tooltip=['year', 'points', 'name_y']  # Show year, points, and name of the winning constructor on hover
).properties(
    width=600,
    height=400,
    title="Maximum Points Scored by Any Constructor Each Year"
)

# Filter out the data where Red Bull has won
redbull_wins = max_points_per_year[max_points_per_year['name_y'] == 'Red Bull']

# Annotate Red Bull wins with circle markers in navy blue
point_annotations = alt.Chart(redbull_wins).mark_circle(color='navy', size=100).encode(
    x='year:O',
    y='points:Q',
)

line_chart + point_annotations
Out[16]:
In [17]:
# Plot the line graph
line_chart = alt.Chart(max_points_per_year).mark_line(color='navy').encode(
    x='year:O',
    y='points:Q',
    tooltip=['year', 'points', 'name_y']  # Show year, points, and name of the winning constructor on hover
).properties(
    width=600,
    height=400,
    title="Maximum Points Scored by Any Constructor Each Year"
)

# Filter out the data where Mercedes, Red Bull, and Ferrari have won
mercedes_wins = max_points_per_year[max_points_per_year['name_y'] == 'Mercedes']
redbull_wins = max_points_per_year[max_points_per_year['name_y'] == 'Red Bull']
ferrari_wins = max_points_per_year[max_points_per_year['name_y'] == 'Ferrari']

# Annotate Mercedes wins with circle markers
mercedes_markers = alt.Chart(mercedes_wins).mark_circle(color='turquoise', size=100).encode(
    x='year:O',
    y='points:Q',
)

# Annotate Red Bull wins with circle markers
redbull_markers = alt.Chart(redbull_wins).mark_circle(color='navy', size=100).encode(
    x='year:O',
    y='points:Q',
)

# Annotate Ferrari wins with circle markers
ferrari_markers = alt.Chart(ferrari_wins).mark_circle(color='red', size=100).encode(
    x='year:O',
    y='points:Q',
)

line_chart + mercedes_markers + redbull_markers + ferrari_markers
Out[17]:
In [18]:
print(drivers)
print(results)
print(races)
     driverId        driverRef number code  forename     surname         dob  \
0           1         hamilton     44  HAM     Lewis    Hamilton  1985-01-07   
1           2         heidfeld     \N  HEI      Nick    Heidfeld  1977-05-10   
2           3          rosberg      6  ROS      Nico     Rosberg  1985-06-27   
3           4           alonso     14  ALO  Fernando      Alonso  1981-07-29   
4           5       kovalainen     \N  KOV    Heikki  Kovalainen  1981-10-19   
..        ...              ...    ...  ...       ...         ...         ...   
849       851           aitken     89  AIT      Jack      Aitken  1995-09-23   
850       852          tsunoda     22  TSU      Yuki     Tsunoda  2000-05-11   
851       853          mazepin      9  MAZ    Nikita     Mazepin  1999-03-02   
852       854  mick_schumacher     47  MSC      Mick  Schumacher  1999-03-22   
853       855             zhou     \N  ZHO    Guanyu        Zhou  1999-05-30   

    nationality                                             url  
0       British     http://en.wikipedia.org/wiki/Lewis_Hamilton  
1        German      http://en.wikipedia.org/wiki/Nick_Heidfeld  
2        German       http://en.wikipedia.org/wiki/Nico_Rosberg  
3       Spanish    http://en.wikipedia.org/wiki/Fernando_Alonso  
4       Finnish  http://en.wikipedia.org/wiki/Heikki_Kovalainen  
..          ...                                             ...  
849     British        http://en.wikipedia.org/wiki/Jack_Aitken  
850    Japanese       http://en.wikipedia.org/wiki/Yuki_Tsunoda  
851     Russian     http://en.wikipedia.org/wiki/Nikita_Mazepin  
852      German    http://en.wikipedia.org/wiki/Mick_Schumacher  
853     Chinese       https://en.wikipedia.org/wiki/Guanyu_Zhou  

[854 rows x 9 columns]
       resultId  raceId  driverId  constructorId number  grid position  \
0             1      18         1              1     22     1        1   
1             2      18         2              2      3     5        2   
2             3      18         3              3      7     7        3   
3             4      18         4              4      5    11        4   
4             5      18         5              1     23     3        5   
...         ...     ...       ...            ...    ...   ...      ...   
25394     25400    1073       815              9     11     4       15   
25395     25401    1073       849              3      6    16       \N   
25396     25402    1073       841             51     99    14       \N   
25397     25403    1073       847              3     63    17       \N   
25398     25404    1073         8             51      7    18       \N   

      positionText  positionOrder  points  laps         time milliseconds  \
0                1              1    10.0    58  1:34:50.616      5690616   
1                2              2     8.0    58       +5.478      5696094   
2                3              3     6.0    58       +8.163      5698779   
3                4              4     5.0    58      +17.181      5707797   
4                5              5     4.0    58      +18.014      5708630   
...            ...            ...     ...   ...          ...          ...   
25394           15             15     0.0    55           \N           \N   
25395            R             16     0.0    50           \N           \N   
25396            R             17     0.0    33           \N           \N   
25397            R             18     0.0    26           \N           \N   
25398            R             19     0.0    25           \N           \N   

      fastestLap rank fastestLapTime fastestLapSpeed  statusId  
0             39    2       1:27.452         218.300         1  
1             41    3       1:27.739         217.586         1  
2             41    5       1:28.090         216.719         1  
3             58    7       1:28.603         215.464         1  
4             43    1       1:27.418         218.385         1  
...          ...  ...            ...             ...       ...  
25394         51    2       1:26.419         219.993         5  
25395         30   15       1:29.293         212.912         3  
25396         33   16       1:29.442         212.557         6  
25397         23   19       1:30.647         209.732         6  
25398         23   18       1:29.698         211.951        23  

[25399 rows x 18 columns]
      raceId  year  round  circuitId                      name        date  \
0          1  2009      1          1     Australian Grand Prix  2009-03-29   
1          2  2009      2          2      Malaysian Grand Prix  2009-04-05   
2          3  2009      3         17        Chinese Grand Prix  2009-04-19   
3          4  2009      4          3        Bahrain Grand Prix  2009-04-26   
4          5  2009      5          4        Spanish Grand Prix  2009-05-10   
...      ...   ...    ...        ...                       ...         ...   
1075    1092  2022     19         22       Japanese Grand Prix  2022-10-09   
1076    1093  2022     20         69  United States Grand Prix  2022-10-23   
1077    1094  2022     21         32    Mexico City Grand Prix  2022-10-30   
1078    1095  2022     22         18      São Paulo Grand Prix  2022-11-13   
1079    1096  2022     23         24      Abu Dhabi Grand Prix  2022-11-20   

          time                                                url  
0     06:00:00  http://en.wikipedia.org/wiki/2009_Australian_G...  
1     09:00:00  http://en.wikipedia.org/wiki/2009_Malaysian_Gr...  
2     07:00:00  http://en.wikipedia.org/wiki/2009_Chinese_Gran...  
3     12:00:00  http://en.wikipedia.org/wiki/2009_Bahrain_Gran...  
4     12:00:00  http://en.wikipedia.org/wiki/2009_Spanish_Gran...  
...        ...                                                ...  
1075  05:10:00  https://en.wikipedia.org/wiki/2022_Japanese_Gr...  
1076  19:00:00  https://en.wikipedia.org/wiki/2022_United_Stat...  
1077  19:00:00  https://en.wikipedia.org/wiki/2022_Mexican_Gra...  
1078  17:00:00  https://en.wikipedia.org/wiki/2022_S%C3%A3o_Pa...  
1079  13:00:00  https://en.wikipedia.org/wiki/2022_Abu_Dhabi_G...  

[1080 rows x 8 columns]
In [19]:
# Merge drivers and results on driverId
merged_driver_results = pd.merge(drivers, results, on='driverId')

# Merge the above dataframe with races on raceId
merged_data = pd.merge(merged_driver_results, races, on='raceId')

print(merged_data)
       driverId        driverRef number_x code forename     surname  \
0             1         hamilton       44  HAM    Lewis    Hamilton   
1             1         hamilton       44  HAM    Lewis    Hamilton   
2             1         hamilton       44  HAM    Lewis    Hamilton   
3             1         hamilton       44  HAM    Lewis    Hamilton   
4             1         hamilton       44  HAM    Lewis    Hamilton   
...         ...              ...      ...  ...      ...         ...   
25394       854  mick_schumacher       47  MSC     Mick  Schumacher   
25395       854  mick_schumacher       47  MSC     Mick  Schumacher   
25396       854  mick_schumacher       47  MSC     Mick  Schumacher   
25397       854  mick_schumacher       47  MSC     Mick  Schumacher   
25398       854  mick_schumacher       47  MSC     Mick  Schumacher   

              dob nationality                                         url_x  \
0      1985-01-07     British   http://en.wikipedia.org/wiki/Lewis_Hamilton   
1      1985-01-07     British   http://en.wikipedia.org/wiki/Lewis_Hamilton   
2      1985-01-07     British   http://en.wikipedia.org/wiki/Lewis_Hamilton   
3      1985-01-07     British   http://en.wikipedia.org/wiki/Lewis_Hamilton   
4      1985-01-07     British   http://en.wikipedia.org/wiki/Lewis_Hamilton   
...           ...         ...                                           ...   
25394  1999-03-22      German  http://en.wikipedia.org/wiki/Mick_Schumacher   
25395  1999-03-22      German  http://en.wikipedia.org/wiki/Mick_Schumacher   
25396  1999-03-22      German  http://en.wikipedia.org/wiki/Mick_Schumacher   
25397  1999-03-22      German  http://en.wikipedia.org/wiki/Mick_Schumacher   
25398  1999-03-22      German  http://en.wikipedia.org/wiki/Mick_Schumacher   

       resultId  ...  fastestLapTime  fastestLapSpeed statusId  year round  \
0             1  ...        1:27.452          218.300        1  2008     1   
1            27  ...        1:35.462          209.033        1  2008     2   
2            57  ...        1:35.520          203.969       11  2008     3   
3            69  ...        1:22.017          204.323        1  2008     4   
4            90  ...        1:26.529          222.085        1  2008     5   
...         ...  ...             ...              ...      ...   ...   ...   
25394     25324  ...              \N               \N        4  2021    18   
25395     25343  ...        1:13.793          210.215       12  2021    19   
25396     25361  ...        1:28.412          219.065       11  2021    20   
25397     25385  ...        1:36.043          231.421        3  2021    21   
25398     25399  ...        1:29.457          212.522       11  2021    22   

      circuitId                      name        date    time_y  \
0             1     Australian Grand Prix  2008-03-16  04:30:00   
1             2      Malaysian Grand Prix  2008-03-23  07:00:00   
2             3        Bahrain Grand Prix  2008-04-06  11:30:00   
3             4        Spanish Grand Prix  2008-04-27  12:00:00   
4             5        Turkish Grand Prix  2008-05-11  12:00:00   
...         ...                       ...         ...       ...   
25394        32    Mexico City Grand Prix  2021-11-07  19:00:00   
25395        18      São Paulo Grand Prix  2021-11-14  17:00:00   
25396        78          Qatar Grand Prix  2021-11-21  14:00:00   
25397        77  Saudi Arabian Grand Prix  2021-12-05  17:30:00   
25398        24      Abu Dhabi Grand Prix  2021-12-12  13:00:00   

                                                   url_y  
0      http://en.wikipedia.org/wiki/2008_Australian_G...  
1      http://en.wikipedia.org/wiki/2008_Malaysian_Gr...  
2      http://en.wikipedia.org/wiki/2008_Bahrain_Gran...  
3      http://en.wikipedia.org/wiki/2008_Spanish_Gran...  
4      http://en.wikipedia.org/wiki/2008_Turkish_Gran...  
...                                                  ...  
25394  http://en.wikipedia.org/wiki/2021_Mexican_Gran...  
25395  http://en.wikipedia.org/wiki/São_Paulo_Grand_Prix  
25396  http://en.wikipedia.org/wiki/2021_Qatar_Grand_...  
25397  http://en.wikipedia.org/wiki/2021_Saudi_Arabia...  
25398  http://en.wikipedia.org/wiki/2021_Abu_Dhabi_Gr...  

[25399 rows x 33 columns]
In [20]:
import altair as alt

# Filter data for Lewis Hamilton and Sebastian Vettel
hamilton_data = merged_data[merged_data['driverRef'] == 'hamilton']
vettel_data = merged_data[merged_data['driverRef'] == 'vettel']

# Aggregate points per year for both drivers
hamilton_points = hamilton_data.groupby('year')['points'].sum().reset_index()
vettel_points = vettel_data.groupby('year')['points'].sum().reset_index()

# Rename columns for clarity
hamilton_points.columns = ['year', 'Lewis Hamilton']
vettel_points.columns = ['year', 'Sebastian Vettel']

# Merge the data for plotting
merged_points = pd.merge(hamilton_points, vettel_points, on='year')
print(merged_points)
    year  Lewis Hamilton  Sebastian Vettel
0   2007           109.0               6.0
1   2008            98.0              35.0
2   2009            49.0              84.0
3   2010           240.0             256.0
4   2011           227.0             392.0
5   2012           190.0             281.0
6   2013           189.0             397.0
7   2014           384.0             167.0
8   2015           381.0             278.0
9   2016           380.0             212.0
10  2017           363.0             317.0
11  2018           408.0             320.0
12  2019           413.0             240.0
13  2020           347.0              33.0
14  2021           387.5              43.0
In [21]:
# Melt the DataFrame for Altair plotting
melted_points = merged_points.melt('year', var_name='driver', value_name='points')

# Create the bar chart
bar_chart = alt.Chart(melted_points).mark_bar().encode(
    x='year:O',
    y='points:Q',
    color='driver:N'
).properties(
    width=600,
    height=400,
    title="Comparison of Total Points: Lewis Hamilton vs Sebastian Vettel"
)

bar_chart
Out[21]:
In [22]:
melted_points = merged_points.melt('year', var_name='driver', value_name='points')

# Create the bar chart
bar_chart = alt.Chart(melted_points).mark_bar().encode(
    x='year:O',
    y='points:Q',
    color=alt.Color('driver:N', scale=alt.Scale(domain=['Lewis Hamilton', 'Sebastian Vettel'], range=['turquoise', 'red']))
).properties(
    width=600,
    height=400,
    title="Comparison of Total Points: Lewis Hamilton vs Sebastian Vettel"
)

bar_chart
Out[22]: